# -*- coding : utf-8 -*-


import re
import io
import os
import tqdm
import json
import copy
import random
import openai
import pymysql
import requests
import threadpool
import uuid
import glob

# from .util_tool import utils
from pydub import AudioSegment
from treelib import Tree
from openpyxl import load_workbook, Workbook
from concurrent.futures import ThreadPoolExecutor
import logging

dialogue_prompt = '''任务的要求为：
{}
任务的运转逻辑是：
{}
任务的可选择话术为：
{}
你需要记住以下几点：
1.当任务的可选择话术为空时，判断任务已完成。
2.当任务的可选择话术不为空，根据对话记录，任务的要求判断任务是否完成。
3.当判断任务已完成，输出"任务完成。"。
4.当判断任务未完成，必须只能在可选择话术中选择一句话，选择的逻辑参考"任务的运转逻辑"和"任务的要求"。输出可选择话术中选择的那句话。
输入：'''

nlu_prompt = '''任务的要求为：
{}
任务的运转逻辑是：
{}
你需要记住以下几点：
1.根据输入的对话记录，分析最后一次用户的表述的含义，给用户表述的含义打一个标签并给出理由。
输入：'''

def generate_uuid(): return str(uuid.uuid4())

def _make_w_io_base(f, mode: str):
    if not isinstance(f, io.IOBase):
        f_dirname = os.path.dirname(f)
        if f_dirname != "":
            os.makedirs(f_dirname, exist_ok=True)
        f = open(f, mode=mode)
    return f

def _make_r_io_base(f, mode: str):
    if not isinstance(f, io.IOBase):
        f = open(f, mode=mode)
    return f

def jdump(obj, f, mode="w", indent=4, default=str):
    """Dump a str or dictionary to a file in json format.

    Args:
        obj: An object to be written.
        f: A string path to the location on disk.
        mode: Mode for opening the file.
        indent: Indent for storing json dictionaries.
        default: A function to handle non-serializable entries; defaults to `str`.
    """
    f = _make_w_io_base(f, mode)
    if isinstance(obj, (dict, list)):
        json.dump(obj, f, indent=indent, default=default, ensure_ascii=False)
    elif isinstance(obj, str):
        f.write(obj)
    else:
        raise ValueError(f"Unexpected type: {type(obj)}")
    f.close()

def jload(f, mode="r"):
    """Load a .json file into a dictionary."""
    f = _make_r_io_base(f, mode)
    jdict = json.load(f)
    f.close()
    return jdict

# 话术选择训练，测试数据生成
def bot_document_to_dialogue(file_path):

    wb = load_workbook(file_path)
    ws = wb[wb.sheetnames[0]]
    FAQ_off_list, FAQ_name_list, FAQ_answer_dict = bot_document_to_faq(file_path)

    task_transfer_pattern = re.compile(r'【.*】')

    task_transfer = dict()
    task_dict = dict()
    task_name = ''
    slot_id = 1
    for i, row in enumerate(ws.values):
        task_name = row[0] if row[0] else task_name
        if task_name == '流程文档结束':
            break

        task_finish_logic = row[1]
        task_run_logic = row[2]

        task_stage = str(row[3])
        task_condition = row[4]
        task_wav_no = row[5]
        task_seat_response = row[6]
        task_simple_seat_response = row[7].replace('【faq答案】+', 'faq')
        task_label = '['+row[9]+']' if row[9] else ''
        task_response_label = '[' + row[8] + ']' if row[8] else ''
        task_break_label = row[13].replace('开场支持打断的标签：\n', '') if row[13] else ''
        # print(task_name, task_stage, task_condition)
        if task_stage == '整体兜底':
            continue


        # 初始化任务
        if task_name not in task_dict:
            task_dict[task_name] = [
                # 进入条件
                dict(),
                # 可选择的简版回复话术
                list(),
                # 真实完整的回复话术
                list(),
                # 任务是否完成的判断逻辑
                task_finish_logic,
                # 任务运转逻辑
                task_run_logic,
                # 以树结构保存的流程路线
                list(),
                # 语句对应的标签可为空值
                list()
            ]

        # 构建任务字典，任务转换字典
        if '.' not in task_stage:
            task_conditions = task_condition.split('\n') if '\n' in task_condition else [task_condition, ]
            for condition in task_conditions:

                task_dict[task_name][0][condition] = [task_simple_seat_response,
                                                      '《槽位id：{}》'.format(slot_id)+'@#'+task_wav_no+'||'+task_seat_response+'#@']
                task_dict[task_name][1].append(task_simple_seat_response)
                task_dict[task_name][2].append(
                    '《槽位id:{}》'.format(slot_id) + '@#' + task_wav_no + '||' + task_seat_response + '#@')
                task_dict[task_name][6].append(task_response_label)
                slot_id += 1

        elif '【' in task_simple_seat_response and '】' in task_simple_seat_response:
            result = task_transfer_pattern.search(task_simple_seat_response)
            if result.group() == '【faq答案】' and \
                    task_simple_seat_response.replace('【faq答案】+', 'faq') not in task_dict[task_name][1]:
                task_dict[task_name][1].append(task_simple_seat_response.replace('【faq答案】+', 'faq'))
                task_dict[task_name][2].append('《槽位id：{}》'.format(slot_id)+task_label+'@#'+task_wav_no+'||'+task_seat_response+'#@')
                task_dict[task_name][6].append(task_response_label)
                slot_id += 1
            else:
                result = result.group().replace('【', '').replace('】', '')
                if '\n' in task_condition:
                    for condition in task_condition.split('\n'):
                        task_transfer[task_name+'+'+condition] = result
                elif '|' in task_condition:
                    task_transfer[task_name+'+'+task_condition.split('|')[0]] = result
                else:
                    task_transfer[task_name+'+'+task_condition] = result
        elif task_simple_seat_response not in task_dict[task_name][1]:
            task_dict[task_name][1].append(task_simple_seat_response)
            task_dict[task_name][2].append('《槽位id：{}》'.format(slot_id)+task_label+'@#'+task_wav_no+'||'+task_seat_response+'#@')
            task_dict[task_name][6].append(task_response_label)
            slot_id += 1

        # 构建bot树
        first_stage = list()
        # 当节点中出现省略时，进行补充溯源
        if 'x' in task_stage and 'y' in task_stage and 'z' in task_stage:
            for x_num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                for y_num in task_stage.split('\n')[2].split('y=')[-1].split(','):
                    for z_num in task_stage.split('\n')[3].split('z=')[-1].split(','):
                        for tree in task_dict[task_name][5]:
                            tree.create_node(
                                task_condition,
                                task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).replace('z', z_num),
                                '.'.join(task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).replace('z', z_num).split('.')[:-1]),
                                task_simple_seat_response.replace('【faq答案】+', 'faq'),
                            )
        elif 'x' in task_stage and 'y' in task_stage:
            for x_num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                for y_num in task_stage.split('\n')[2].split('y=')[-1].split(','):
                    for tree in task_dict[task_name][5]:
                        tree.create_node(
                            task_condition, task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num),
                            '.'.join(task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).split('.')[:-1]),
                            task_simple_seat_response.replace('【faq答案】+', 'faq'),
                        )
        elif 'x' in task_stage:
            for num in task_stage.split('\n')[1].split('x=')[-1].split(','):
            # for num in task_stage.replace(')', '').split('x=')[-1].split(','):
                for tree in task_dict[task_name][5]:
                    # print(task_stage.split('(')[0].replace('x', num))
                    tree.create_node(
                        task_condition, task_stage.split('\n')[0].replace('x', num),
                        '.'.join(task_stage.split('\n')[0].replace('x', num).split('.')[:-1]),
                        task_simple_seat_response.replace('【faq答案】+', 'faq'))
        else:
            stage_level = task_stage.count('.')
            if stage_level == 0:
                tree = Tree()
                first_stage.append(task_simple_seat_response.replace('【faq答案】+', 'faq'))
                # tag=None（进入条件）, identifier=None（节点编号）, parent=None（上个节点编号）, data=None（动作，回复话术或者已完成任务）
                tree.create_node(task_condition, task_stage, None, task_simple_seat_response.replace('【faq答案】+', 'faq'))
                # task_dict[task_name][5].append(tree)
                task_dict[task_name][5] = [tree, ]

            else:
                for tree in task_dict[task_name][5]:
                    tree.create_node(task_condition, task_stage, '.'.join(task_stage.split('.')[:-1]),
                                     task_simple_seat_response.replace('【faq答案】+', 'faq'))

    # 尝试输出树结构
    for tree in task_dict['收集资金用途'][5]:
        # print(len(tree.leaves()))
        # print(tree.leaves())
        for leaf in tree.leaves():

            for i, condition in enumerate(['金额', '肯定', '否定', '其他', 'FAQ']):
                tree.create_node(
                    condition, leaf.identifier+'.'+str(i+1), leaf.identifier,
                    '【促成】'
                )
        # print(len(tree.leaves()))
        # print(tree.leaves())


    # 尝试输出路径
    '''tree = task_dict['核身'][5][0]
    tree = Tree()
    node_id_list = ['1', ]
    print(tree.get_node('1'))
    print(tree.children('1')[0] if tree.children('1') else '')
    print(tree.children(tree.children('1')[0].identifier))
    # 获取所有叶子节点的路径
    print(len(tree.paths_to_leaves()))
    for paths in tree.paths_to_leaves():
        print(paths)
        for path_id in paths:
            print( '用户：{}\n销售员：{}'.format(tree.get_node(path_id).tag, tree.get_node(path_id).data) )
    
    print(task_dict)
    print(task_transfer)'''

    # 构建话术选择训练数据
    input_list = list()
    insert_data_list = list()
    nlu_data_list = list()
    task_paths = dict()
    for task_name in tqdm.tqdm(task_dict):
        for tree in task_dict[task_name][5]:
            task_paths[task_name] = list()
            for paths in tree.paths_to_leaves():
                # if task_name == '收集资金用途' or task_name == '收集用户当前用款场景':
                #     print(tree.get_node(paths[-1]).data)

                # 话术选择训练数据
                if len(paths) >= 3:
                    paths_list = [paths[:2], ]
                    # paths_list = list()
                    for i in range(len(paths) - 2):
                        sublist = paths[i:i + 3]
                        paths_list.append(sublist)

                    for paths in paths_list:
                        if paths not in task_paths[task_name]:
                            task_paths[task_name].append(paths)
                            # if paths == ['1', '1.5', '1.5.4']:
                            #     print(paths)
                            data_list, input_list = path_to_dialogue_data(
                                paths, task_dict, task_name, tree, input_list, FAQ_name_list, dialogue_prompt)
                            insert_data_list.extend(data_list)

                else:
                    if paths not in task_paths[task_name]:
                        task_paths[task_name].append(paths)
                        # if paths == ['1', '1.5', '1.5.4']:
                        #     print(paths)
                        data_list, input_list = path_to_dialogue_data(
                            paths, task_dict, task_name, tree, input_list, FAQ_name_list, dialogue_prompt)
                        insert_data_list.extend(data_list)

                # 打标训练数据
                '''paths_list = list()
                for i in range(len(paths) - 1):
                    sublist = paths[i:i + 2]
                    paths_list.append(sublist)

                for paths in paths_list:
                    data_list, input_list = path_to_dialogue_data(
                        paths, task_dict, task_name, tree, input_list, FAQ_name_list, nlu_prompt)
                    nlu_data_list.extend(data_list)'''


    random.shuffle(insert_data_list)

    print(len(insert_data_list))
    print(len(nlu_data_list))
    jdump(insert_data_list, './v2_train_data_360UJD首贷.json')
    jdump(nlu_data_list, './v2_nlu_train_data_360UJD首贷.json')


def path_to_dialogue_data(paths, task_dict, task_name, tree, input_list, FAQ_name_list, prompt):


    data_list = list()
    dialogues_list = ['', ]

    task_transfer_pattern = re.compile(r'【.*】')

    for i, path_id in enumerate(paths):
        dialogues_mid_list = list()
        for dialogues in dialogues_list:
            # dialogues_mid_list = list()
            # print(tree.get_node(path_id).tag)
            if 'FAQ' in tree.get_node(path_id).tag and i != 0:
                # print(tree.get_node(path_id).tag)
                unpick_faq_name_list = tree.get_node(path_id).tag.replace('FAQ', '').split('|')
                for faq_name in FAQ_name_list:
                    if faq_name not in unpick_faq_name_list:
                        dialogues_mid_list.append(
                            dialogues + '用户：{}\n销售员：{}\n'.format(faq_name,
                                                                 tree.get_node(path_id).data)
                        )

            elif '\n' in tree.get_node(path_id).tag and i != 0:
                faq_name_list = tree.get_node(path_id).tag.split('\n')
                for faq_name in faq_name_list:
                    dialogues_mid_list.append(
                        dialogues + '用户：{}\n销售员：{}\n'.format(faq_name,
                                                             tree.get_node(path_id).data)
                    )
            else:
                dialogues_mid_list.append(
                    dialogues + '用户：{}\n销售员：{}\n'.format(tree.get_node(path_id).tag, tree.get_node(path_id).data)
                )

        dialogues_list = dialogues_mid_list
        # if paths == ['1', '1.5', '1.5.4']:
        # if task_name == '收集用户当前的用款方案':
        #     print(dialogues_list)

    # if task_name == '收集用户当前的用款方案':
    #     print(paths)
    #     print('最终对话列表:', dialogues_list)

    dialogues_mid_list = list()
    if paths[0] == '1':
        for dialogues in dialogues_list:
            for enter_condition in task_dict[task_name][0]:
                if enter_condition not in dialogues:
                    dialogues_mid_list.append('用户：卡卡卡\n销售员：'+enter_condition+'用户：'.join(dialogues.split('用户：')[1:]))
                elif enter_condition in dialogues:
                    dialogues_mid_list.append(dialogues)
        dialogues_list = dialogues_mid_list


    # 过滤已有话术
    for dialogues in dialogues_list:
        input = '销售员：' + '销售员：'.join(dialogues.split('销售员：')[1:-1])
        dialogue_list = list()
        for dialogue in task_dict[task_name][1]:
            # if dialogue not in input:
            dialogue_list.append(dialogue)

        instruction = prompt.format(task_dict[task_name][3], task_dict[task_name][4],
                                    '|'.join(dialogue_list) if dialogue_list else '', )
        output = dialogues.split('销售员：')[-1].replace('\n', '')

        result = task_transfer_pattern.search(output)
        if result and result.group() != '【faq答案】':
        # if '【' in output and '】' in output and '【】':
            output = '任务完成'
        else:
            output = output

        if input not in input_list:
            input_list.append(input)
            data_list.append({
                'instruction': instruction,
                'input': input,
                'output': output,
            })

    return data_list, input_list


def bot_document_to_faq(file_path):
    wb = load_workbook(file_path)
    ws = wb[wb.sheetnames[1]]

    workspace_all_intent_list = ['不需要-无原因', '不需要-会考虑', '不需要-不缺钱']

    FAQ_answer_dict = dict()
    FAQ_name_list = list()
    FAQ_off_list = list()
    for i, row in tqdm.tqdm(enumerate(ws.values)):
        if i != 0 and row[0]:
            faq_name = row[0]
            faq_wav_no = row[1]
            faq_label = row[2]
            faq_seat_response = row[3]
            faq_sign = row[5]

            FAQ_answer_dict[faq_name] = [faq_seat_response, faq_wav_no, faq_label]

            if '轮询' not in faq_name and \
                    faq_name not in ['投诉', '别给我打电话了', '你怎么有我号码', '强烈拒绝', '语音信箱',
                                     '第一次静音', '第二次静音', '第三次静音', '多次不需要', '多次在忙']:
                FAQ_name_list.append(faq_name)
            if faq_sign == '挂机':
                FAQ_off_list.append(faq_name)

    for other_intent in workspace_all_intent_list:
        if other_intent not in FAQ_name_list:
            FAQ_name_list.append(other_intent)

    return FAQ_off_list, FAQ_name_list, FAQ_answer_dict


def bot_document_to_tree(file_path):

    wb = load_workbook(file_path)
    ws = wb[wb.sheetnames[0]]
    FAQ_off_list, FAQ_name_list, FAQ_answer_dict = bot_document_to_faq(file_path)
    print(FAQ_name_list)
    task_transfer_pattern = re.compile(r'【.*】')

    task_name = ''
    task_dict = dict()
    for i, row in enumerate(ws.values):

        task_name = row[0] if row[0] else task_name
        if task_name == '流程文档结束':
            break
        task_finish_logic = row[1]
        task_run_logic = row[2]

        task_stage = str(row[3])
        task_condition = row[4]
        task_wav_no = row[5]
        task_seat_response = row[6]
        task_simple_seat_response = row[7].replace('【faq答案】+', 'faq')
        task_label = '[' + row[9] + ']' if row[9] else ''
        task_response_label = '[' + row[8] + ']' if row[8] else ''
        task_break_label = row[13].replace('开场支持打断的标签：\n', '') if row[13] else ''
        if task_stage == '整体兜底':
            continue

        if task_name not in task_dict:
            task_dict[task_name] = {
                'tree': dict(), # 以树结构保存的流程路线
                'tree_path': list(),
                'enter_simple_response': dict(),
                'simple_response': list(),
                'simple_response_to_response': dict(),
                'next_task': dict()
            }

        # 添加选择的简版话术
        if task_simple_seat_response not in task_dict[task_name]['simple_response']:
            task_dict[task_name]['simple_response'].append(task_simple_seat_response)
            task_dict[task_name]['simple_response_to_response'][task_simple_seat_response] = {
                'wav_no': task_wav_no,
                'response': task_seat_response,
                'label': task_label,
                'response_label': task_response_label,
            }


        # 构建bot树
        # 当节点中出现省略时，进行补充溯源
        if 'x' in task_stage and 'y' in task_stage and 'z' in task_stage:
            for x_num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                for y_num in task_stage.split('\n')[2].split('y=')[-1].split(','):
                    for z_num in task_stage.split('\n')[3].split('z=')[-1].split(','):
                        for condition in task_dict[task_name]['tree']:
                            tree = task_dict[task_name]['tree'][condition]
                            tree.create_node(
                                task_condition,
                                task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).replace('z', z_num),
                                '.'.join(task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).replace('z', z_num).split('.')[:-1]),
                                # task_simple_seat_response,
                                task_wav_no if task_wav_no else task_simple_seat_response,
                            )
        elif 'x' in task_stage and 'y' in task_stage:
            for x_num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                for y_num in task_stage.split('\n')[2].split('y=')[-1].split(','):
                    for condition in task_dict[task_name]['tree']:
                        tree = task_dict[task_name]['tree'][condition]
                        tree.create_node(
                            task_condition, task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num),
                            '.'.join(task_stage.split('\n')[0].replace('x', x_num).replace('y', y_num).split('.')[:-1]),
                            # task_simple_seat_response,
                            task_wav_no if task_wav_no else task_simple_seat_response,
                        )
        elif 'x' in task_stage:
            for num in task_stage.split('\n')[1].split('x=')[-1].split(','):
                # for num in task_stage.replace(')', '').split('x=')[-1].split(','):
                for condition in task_dict[task_name]['tree']:
                    tree = task_dict[task_name]['tree'][condition]
                    tree.create_node(
                        task_condition, task_stage.split('\n')[0].replace('x', num),
                        '.'.join(task_stage.split('\n')[0].replace('x', num).split('.')[:-1]),
                        # task_simple_seat_response,
                        task_wav_no if task_wav_no else task_simple_seat_response,
                    )
        else:
            stage_level = task_stage.count('.')
            if stage_level == 0:
                task_condition = task_condition.split('\n') if '\n' in task_condition else [task_condition, ]
                for condition in task_condition:

                    tree = Tree()
                    # tag=None（进入条件）, identifier=None（节点编号）, parent=None（上个节点编号）, data=None（动作，回复话术或者已完成任务）
                    tree.create_node(condition, task_stage, None,
                                     # task_simple_seat_response,
                                     task_wav_no if task_wav_no else task_simple_seat_response,)
                    task_dict[task_name]['tree'][condition] = tree

                    task_dict[task_name]['enter_simple_response'][condition] = task_simple_seat_response

            else:
                for condition in task_dict[task_name]['tree']:
                    tree = task_dict[task_name]['tree'][condition]
                    tree.create_node(task_condition, task_stage, '.'.join(task_stage.split('.')[:-1]),
                                     # task_simple_seat_response,
                                     task_wav_no if task_wav_no else task_simple_seat_response,)

    # 尝试输出树结构
    for condition in task_dict['收集资金用途']['tree']:
        tree = task_dict['收集资金用途']['tree'][condition]

        for leaf in tree.leaves():

            for i, condition in enumerate(['金额', '肯定', '否定', '其他', 'FAQ']):
                tree.create_node(
                    condition, leaf.identifier + '.' + str(i + 1), leaf.identifier,
                    '【促成】'
                )

    # 尝试输出树结构
    '''for condition in task_dict['收集资金用途']['tree']:
        tree = task_dict['收集资金用途']['tree'][condition]
        tree.show(idhidden=False)'''

    all_num = 0
    # 每个子树获取所有到叶子节点的路径
    for task_name in task_dict:
        if task_name == '收集用户当前用款场景':
            break

        for condition in task_dict[task_name]['tree']:
            for paths in task_dict[task_name]['tree'][condition].paths_to_leaves():
                path_list = list()
                for path_id in paths:
                    path_list.append(task_dict[task_name]['tree'][condition].get_node(path_id).tag)
                    path_list.append(task_dict[task_name]['tree'][condition].get_node(path_id).data)

                # 添加路径
                task_dict[task_name]['tree_path'].append(path_list)
                # if path_list[-2] == '建材':
                #     print(paths)

                # 添加到达其他任务的路径
                leave_data = task_transfer_pattern.search(task_dict[task_name]['tree'][condition].get_node(paths[-1]).data)
                if leave_data:
                    leave_data = leave_data.group().replace('【', '').replace('】', '')
                    if leave_data not in task_dict[task_name]['next_task'] or \
                        (leave_data in task_dict[task_name]['next_task'] and len(task_dict[task_name]['next_task'][leave_data]) > len(path_list)):

                        task_dict[task_name]['next_task'][leave_data] = path_list


        all_num += len(task_dict[task_name]['tree_path'])
        print(task_name)
        print(task_dict[task_name]['tree_path'])
        print(task_dict[task_name]['next_task'])

    print(all_num)

    # 从核身开场开始进行数据构建
    finish_paths_list = list()
    unfinish_paths_list = list()

    finish_paths_list.extend(task_dict['核身']['tree_path'])
    # print(task_dict['核身']['tree_path'])
    for next_task in task_dict['核身']['next_task']:
        unfinish_paths_list.append(
            task_dict['核身']['next_task'][next_task]
        )
    # print(finish_paths_list)
    # print(unfinish_paths_list)
    while unfinish_paths_list:
        paths_list = list()
        for path in tqdm.tqdm(unfinish_paths_list):
            task_name = task_transfer_pattern.search(path[-1]).group().replace('【', '').replace('】', '')
            for next_task_path in task_dict[task_name]['tree_path']:
                finish_path = copy.deepcopy(path[:-1])
                finish_path.extend(next_task_path[1:])
                if finish_path not in finish_paths_list:
                    finish_paths_list.append(finish_path)
                    # print(finish_path)

            for next_task in task_dict[task_name]['next_task']:
                finish_path = copy.deepcopy(path[:-1])
                finish_path.extend(task_dict[task_name]['next_task'][next_task][1:])
                paths_list.append(finish_path)

        unfinish_paths_list = paths_list

    # print(finish_paths_list)
    # print(unfinish_paths_list)
    # print(len(finish_paths_list))


    path_list = list()
    test_path_list= list()
    for path in finish_paths_list:
        # 替换路径找到对应的录音编号
        if '【' in path[-1] and '】' in path[-1]:

            enter_condition = path[-1].split('】')[-1]
            # enter_condition = path[-2]
            print(path)
            task_name = task_transfer_pattern.search(path[-1]).group().replace('【', '').replace('】', '')
            print(enter_condition, task_name)
            print(task_dict[task_name]['enter_simple_response'])
            if enter_condition and enter_condition in task_dict[task_name]['enter_simple_response']:
                simple_response = task_dict[task_name]['enter_simple_response'][enter_condition]
            else:
                simple_response = task_dict[task_name]['enter_simple_response']['首句']

            wav_no = task_dict[task_name]['simple_response_to_response'][simple_response]['wav_no']
            print(wav_no)
            path[-1] = wav_no

        # 生成测试数据
        user_response = list()
        for i, row in enumerate(path[2:]):
            # 优化row
            if 'FAQ' in row:
                row = 'FAQ'
            if '不需要-无原因' in row:
                row = '不需要'
            if '在忙-无原因' in row:
                row = '在忙'
            if row == '静音':
                row = '@@quiet@@'
            if '\n' in row:
                row = row.split('\n')[0]
            if row in ['平台银行用款方案', '平台产品']:
                row = '花呗'
            if row == '金额':
                row = '三万'
            if row == '其他':
                row = '不明'
            if row in ['资金周转', '生活消费']:
                row = '肯定'

            if i % 2 != 0:
                # if ','.join(user_response) == '什么平台,不需要-使用其他平台,肯定,建材':
                #     print(','.join(user_response)+'+'+row)

                if ','.join(user_response) not in path_list:
                    test_path_list.append(','.join(user_response)+'+'+row)
                    path_list.append(','.join(user_response))
                    # print(path)
                    # print(','.join(user_response), row)
            else:
                user_response.append(row)



    wb = Workbook()
    ws = wb.active

    for paths in tqdm.tqdm(test_path_list):
        ws.append(['', '', paths.split('+')[0], paths.split('+')[1]])

    wb.save('./test_dialogue_data.xlsx')


def get_wav_online_content(company_id, tts_model):
    url = 'http://work.xi-ai.com/admin/soundRecording/getRecordManageList?companyId={}&ttsModel={}'.format(company_id,
                                                                                                           tts_model)
    wav_key = '{}_{}'.format(company_id, tts_model)
    wav_dict = {
        wav_key: dict()
    }
    response = json.loads(requests.get(url).text)

    for wav_data in response['data'][0]['list']:
        file_name = wav_data['fileName']
        content = wav_data['content']
        print(file_name, content)
        wav_dict[wav_key][file_name] = content

    return wav_dict


def bot_document_seat_response(file_path):
    wb = load_workbook(file_path)
    ws = wb[wb.sheetnames[0]]

    seat_response = set()
    task_transfer_pattern = re.compile(r'【.*】')
    for i, row in enumerate(ws.values):
        if row[7]:
            task_simple_seat_response = row[7].replace('【faq答案】+', 'faq')
            if '再见' in task_simple_seat_response:
                continue

            result = task_transfer_pattern.search(task_simple_seat_response)

            if result and '+' in result.group():
                task_simple_seat_response = result.group().split('+')[0].replace('【', '').replace('】', '')
            elif result:
                continue

            seat_response.add(task_simple_seat_response)

    print(seat_response)
    return list(seat_response)


def request_chatgpt(content):
    # print(content)

    prompt = [{'role': 'system', 'content': content}, ]
    response = openai.ChatCompletion.create(
        api_type="azure",
        api_version="2023-03-15-preview",
        api_base="https://lingxi-openai.openai.azure.com",
        api_key="45a5ee249f364e208dd950f87ab5aba7",
        engine="gpt-35",
        messages=prompt,
        temperature=0.8,
        max_tokens=2048,
        request_timeout=10,
    )
    result = response["choices"][0]['message']['content']
    # print(result)

    return result


def dialogue_data():
    connection = pymysql.connect(
        host="39.103.215.119",
        # host="am-8vbwn20384jdq3vq185480.zhangbei.ads.aliyuncs.com",
        port=3308,
        # port=3306,
        user="ds_user",
        passwd="Moxi123#",
        # db="data_center_temp",
        charset='utf8mb4',
        cursorclass=pymysql.cursors.DictCursor
    )
    cursor = connection.cursor()
    sql = '''
select dm_session_id, customer_id 
from ods_outbound_data_platform.outbound_call_result 
where call_start_time >= "2023-10-08 00:00:00" 
and call_start_time <= "2023-10-08 23:00:00" 
and robot_answer_duration > 30 
and company_id = 2132 
and call_status in ("normalConnection", "transferFail") 
limit 1000
'''
    # and dm_version in ("白条京东商城批次0815", "金条纯机-复贷-通用版")
    cursor.execute(sql)
    results = cursor.fetchall()

    dm_url = 'https://work.xi-ai.com/dataCenter/dm/detail?sessionId={}'
    print(len(results))

    wb = Workbook()
    ws = wb.active
    ws.append([
        '客户ID', 'session_id', '语句', '意图', '态度'
    ])
    query_dict = dict()
    query_list = list()
    for result in tqdm.tqdm(results):
        session_id = result['dm_session_id']
        customer_id = result['customer_id']

        response = json.loads(requests.get(dm_url.format(session_id)).text)['data']

        # if response[-1]['dialogueRound'] > 2:
        for res in response:
            if res['speakerType'] == 'USER' and res['idlResultJson']:
                result_json = json.loads(res['idlResultJson'])
                intent = result_json['standardQuery'] if result_json['standardQuery'] else "NOINTENT"
                attitude = result_json['originalAttitude']
                query = result_json['query']
                query = process_queries(query)

                if query not in query_list:
                    query_list.append(query)

                    ws.append([
                        customer_id, session_id, query, intent, attitude
                    ])

                if query not in query_dict:
                    query_dict[query] = 1
                else:
                    query_dict[query] += 1

            '''msg_content = res['msgContent']

            intent = ''
            attitude = ''
            if res['idlResultJson']:
                result_json = json.loads(res['idlResultJson'])
                intent = result_json['standardQuery'] if result_json['standardQuery'] else "NOINTENT"
                attitude = result_json['originalAttitude']


            ws.append([
                customer_id, session_id, msg_content, intent, attitude
            ])'''


    ws_2 = wb.create_sheet('语料出现次数')
    ws_2.append([
        '语句', '次数'
    ])

    for query in tqdm.tqdm(query_dict):
        if query_dict[query] > 1:
            ws_2.append([
                query, query_dict[query]
            ])

    wb.save('./2132标注测试.xlsx')


def process_queries(current_query, ):

    # 删除相近相同词
    sign = True
    while sign:
        i = 0
        sign = False
        record_list = list()
        final_str = list()

        while i < len(current_query):
            repeat_num = 0
            for j in range(1, 6):
                target = current_query[i:i + j]
                while True:
                    if target == current_query[i + j * (repeat_num + 1):i + j * (repeat_num + 2)]:
                        repeat_num += 1
                    else:
                        break
                if repeat_num:
                    record_append = (i, j, repeat_num)
                    break
            if repeat_num:
                if target in ['', ] or not (u'\u4e00' <= target <= u'\u9fff'):
                    final_str.append(target * 2)
                else:
                    final_str.append(target)
                    sign = True
                record_list.append(target)
                i = i + j * (repeat_num + 1)
                continue
            final_str.append(current_query[i])
            i += 1
        current_query = ''.join(final_str)

    # 将被标点符号隔开的相同字合并在一块
    current_query_result = ''
    for i in range(len(current_query)):
        if current_query[i] in ['，', '。', '？', '！', '、']:
            for j in range(1, 5):
                if current_query[i - j if i - j >= 0 else 0:i] == current_query[i + 1:i + j + 1]:
                    current_query_result = current_query_result[:-1 * j]
                    break
        current_query_result += current_query[i]
    current_query = current_query_result

    for point_sign in ['？。', '？，', '，。', '，，', '。，', '。。']:
        current_query = current_query.replace(point_sign, '，')

    if current_query[0] in ['，', '。']:
        current_query = current_query[1:]

    return current_query


def baixin_wav(file_path='./百信录音文件10-24To10-25.json'):

    datas = jload(file_path)
    # print(data)
    num = 0
    asr_url = 'http://8.142.69.133:9503/quality_test?customerId={}&wavPath={}&IP=百信人人'
    for data in tqdm.tqdm(datas[:1]):
        # print(data)
        for case in tqdm.tqdm(data['data']['list']):
            if 'filePath' in case and 'duration'in case and case['duration'] > 30:
                custId = case['custId']
                wav_url = 'https://ics-tmscall.aibank.com/upload-service/upload/' + case['filePath']
                try:
                    # response = requests.get(asr_url.format(custId, wav_url))
                    response = requests.get(wav_url)
                    with open('./baixin_audio/' + str(custId) + '_' + str(case['duration']) + '.wav', 'wb') as fp:
                    # with open('./baixin_audio/'+str(custId)+'_'+str(case['duration'])+'.wav', 'wb') as fp:
                        fp.write(response.content)
                        fp.flush()
                    num += 1
                    # if num == 200:
                    #     break
                except Exception as err:
                    pass
            break



def fix_audio():

    for file_name in tqdm.tqdm(glob.glob('./baixin_audio/*')):
        # print(file_name)
        # 读取ALAW文件
        audio = AudioSegment.from_file(file_name, format="alaw")

        # 将ALAW格式转换为PCM格式（例如WAV）
        pcm_audio = audio.set_channels(1).set_frame_rate(8000)

        # 保存为WAV文件
        pcm_audio.export('./baixin_audio_fix/' + file_name.split('/')[-1], format="wav")


def baixin_pass():

    wb = Workbook()
    ws = wb.active
    ws.append(['角色', '文本', '结果'])

    nlu_url = 'http://8.142.85.77:8679/nlu?session_id=-1&workspace=222&current_query={}'
    for file_name in tqdm.tqdm(glob.glob('./lx_v0.8_trans/*')):
        # print(file_name)
        with open(file_name, 'r') as fp:

            datas = fp.readlines()
            for data in tqdm.tqdm(datas[1:]):
                # print(data[:-1])
                if '声道1' in data[:-1]:
                    role = '用户'
                    try:
                        response = requests.get(nlu_url.format(data[:-1].replace('声道1：', '')))
                        result = json.loads(response.text)
                        intent_name = result['standard_query'] if result['standard_query'] and result['standard_query'] != 'NOINTENT' else ''
                        ws.append([
                            role, data[:-1], intent_name
                        ])
                    except Exception as err:

                        ws.append([
                            role, data[:-1], '报错'
                        ])

                elif '声道0' in data[:-1]:
                    role = '销售员'
                    ws.append([
                        role, data[:-1]
                    ])

            ws.append([
                '', '', ''
            ])

    wb.save('./百信_10_30.xlsx')

import time
import asyncio

def task():

    start_url = 'http://8.142.69.133:18635/dialogue/start?workspaceId=351&dialogueId=2332&faqId=222&sessionId=1-2144-833777324-1714526145152471042-1360&userId=833777324'
    process_url = 'http://8.142.69.133:18635/dialogue/process?workspaceId=351&dialogueId=2332&faqId=222&sessionId=1-2144-833777324-1714526145152471042-1360&userId=833777324&query={}&is_try=false&breakParam='

    time_1 = time.time()
    response = requests.get(start_url)
    time_2 = time.time()
    print(time_2 - time_1)
    print(json.loads(response.text))
    response = requests.get(process_url.format('是的'))
    time_3 = time.time()
    print(json.loads(response.text))

    print( time_3 - time_2 )

if __name__ == '__main__':


    # baixin_wav()
    # fix_audio()
    # baixin_pass()

    task()


